Dashboard
In [1]:
Copied!
%matplotlib inline
import matplotlib.pyplot as plt
import seaborn as sns
from bokeh.plotting import figure, show
from bokeh.io import output_notebook, output_file
from bokeh.layouts import column, row
from bokeh.models import OpenURL, TapTool, CustomJS, Div, Callback
from bokeh import events
output_notebook()
%matplotlib inline
import matplotlib.pyplot as plt
import seaborn as sns
from bokeh.plotting import figure, show
from bokeh.io import output_notebook, output_file
from bokeh.layouts import column, row
from bokeh.models import OpenURL, TapTool, CustomJS, Div, Callback
from bokeh import events
output_notebook()
Define utility functions¶
In [2]:
Copied!
from dandi.dandiapi import DandiAPIClient
import json
import pandas as pd
import numpy as np
from urllib.parse import quote, unquote
from dandi.dandiapi import DandiAPIClient
import json
import pandas as pd
import numpy as np
from urllib.parse import quote, unquote
In [3]:
Copied!
def assets_to_df(ds):
assets = list(ds.get_assets())
asset_info = []
for asset in assets:
path_parts = asset.path.split("/")
sub = None
for val in path_parts[:-1]:
if val.startswith("sub-"):
sub = val.split("sub-")[1]
assetname = path_parts[-1]
info = dict([[val.split("-")[0], "-".join(val.split("-")[1:])]
for val in assetname.split(".")[0].split("_")
if "-" in val])
if sub:
info["subdir"] = sub
info["path"] = asset.path
modality = None
if "_" in assetname and "sub-" in assetname:
path = "sub-".join(asset.path.split("sub-")[1:])
if len(path.split("/")) > 1:
modality = assetname.split("_")[-1].split(".")[0]
info["modality"] = modality
ext = ".".join(assetname.split(".")[1:])
info["extension"] = ext
info["modified"] = asset.modified
asset_info.append(info)
df = pd.DataFrame(asset_info)
return df, assets
def assets_to_df(ds):
assets = list(ds.get_assets())
asset_info = []
for asset in assets:
path_parts = asset.path.split("/")
sub = None
for val in path_parts[:-1]:
if val.startswith("sub-"):
sub = val.split("sub-")[1]
assetname = path_parts[-1]
info = dict([[val.split("-")[0], "-".join(val.split("-")[1:])]
for val in assetname.split(".")[0].split("_")
if "-" in val])
if sub:
info["subdir"] = sub
info["path"] = asset.path
modality = None
if "_" in assetname and "sub-" in assetname:
path = "sub-".join(asset.path.split("sub-")[1:])
if len(path.split("/")) > 1:
modality = assetname.split("_")[-1].split(".")[0]
info["modality"] = modality
ext = ".".join(assetname.split(".")[1:])
info["extension"] = ext
info["modified"] = asset.modified
asset_info.append(info)
df = pd.DataFrame(asset_info)
return df, assets
In [4]:
Copied!
cubehelix_template = """
#uicontrol float brightness slider(min=0.0, max=100.0, default=%f)
void main() {
float x = clamp(toNormalized(getDataValue()) * brightness, 0.0, 1.0);
float angle = 2.0 * 3.1415926 * (4.0 / 3.0 + x);
float amp = x * (1.0 - x) / 2.0;
vec3 result;
float cosangle = cos(angle);
float sinangle = sin(angle);
result.r = -0.14861 * cosangle + 1.78277 * sinangle;
result.g = -0.29227 * cosangle + -0.90649 * sinangle;
result.b = 1.97294 * cosangle;
result = clamp(x + amp * result, 0.0, 1.0);
emitRGB(result);
}
"""
cubehelix2_template = """
#uicontrol float brightness slider(min=0.0, max=100.0, default=%f)
void main() {
float x = clamp(toNormalized(getDataValue()) * brightness, 0.0, 1.0);
float angle = 2.0 * 3.1415926 * (4.0 / 3.0 + x);
float amp = x * (1.0 - x) / 2.0;
vec3 result;
float cosangle = cos(angle);
float sinangle = sin(angle);
result.g = -0.14861 * cosangle + 1.78277 * sinangle;
result.r = -0.29227 * cosangle + -0.90649 * sinangle;
result.b = 1.97294 * cosangle;
result = clamp(x + amp * result, 0.0, 1.0);
emitRGB(result);
}
"""
cubehelix3_template = """
#uicontrol float brightness slider(min=0.0, max=100.0, default=%f)
void main() {
float x = clamp(toNormalized(getDataValue()) * brightness, 0.0, 1.0);
float angle = 2.0 * 3.1415926 * (4.0 / 3.0 + x);
float amp = x * (1.0 - x) / 2.0;
vec3 result;
float cosangle = cos(angle);
float sinangle = sin(angle);
result.b = -0.14861 * cosangle + 1.78277 * sinangle;
result.g = -0.29227 * cosangle + -0.90649 * sinangle;
result.r = 1.97294 * cosangle;
result = clamp(x + amp * result, 0.0, 1.0);
emitRGB(result);
}
"""
cubehelix4_template = """
#uicontrol float brightness slider(min=0.0, max=100.0, default=%f)
void main() {
float x = clamp(toNormalized(getDataValue()) * brightness, 0.0, 1.0);
float angle = 2.0 * 3.1415926 * (4.0 / 3.0 + x);
float amp = x * (1.0 - x) / 2.0;
vec3 result;
float cosangle = cos(angle);
float sinangle = sin(angle);
result.r = -0.14861 * cosangle + 1.78277 * sinangle;
result.g = -0.29227 * cosangle + -0.90649 * sinangle;
result.b = 1.97294 * cosangle;
result = clamp(x + amp * result, 0.0, 1.0);
emitRGB(result);
}
"""
cubehelix5_template = """
#uicontrol float brightness slider(min=0.0, max=100.0, default=%f)
void main() {
float x = clamp(toNormalized(getDataValue()) * brightness, 0.0, 1.0);
float angle = 2.0 * 3.1415926 * (4.0 / 3.0 + x);
float amp = x * (1.0 - x) / 2.0;
vec3 result;
float cosangle = cos(angle);
float sinangle = sin(angle);
result.g = -0.14861 * cosangle + 1.78277 * sinangle;
result.b = -0.29227 * cosangle + -0.90649 * sinangle;
result.r = 1.97294 * cosangle;
result = clamp(x + amp * result, 0.0, 1.0);
emitRGB(result);
}
"""
cubehelix6_template = """
#uicontrol float brightness slider(min=0.0, max=100.0, default=%f)
void main() {
float x = clamp(toNormalized(getDataValue()) * brightness, 0.0, 1.0);
float angle = 2.0 * 3.1415926 * (4.0 / 3.0 + x);
float amp = x * (1.0 - x) / 2.0;
vec3 result;
float cosangle = cos(angle);
float sinangle = sin(angle);
result.b = -0.14861 * cosangle + 1.78277 * sinangle;
result.r = -0.29227 * cosangle + -0.90649 * sinangle;
result.g = 1.97294 * cosangle;
result = clamp(x + amp * result, 0.0, 1.0);
emitRGB(result);
}
"""
ng_colormap = {"LEC": cubehelix_template % 50,
"YO": cubehelix2_template % 50,
"NN": cubehelix3_template % 50,
"CR": cubehelix4_template % 50,
'NPY': cubehelix5_template % 50,
'IBA1': cubehelix6_template % 50,
'SST': cubehelix4_template % 50,
'ABETA': cubehelix_template % 50,
'PTAU': cubehelix5_template % 50,
'GFAP': cubehelix3_template % 50,
'PV': cubehelix5_template % 50,
"MBP": cubehelix5_template % 50,
"SMI": cubehelix3_template % 50,
"SMI312": cubehelix3_template % 50,
"CD31": cubehelix4_template % 50,
}
def to_layout(layers):
nlayers = len(layers)
if nlayers == 1:
return "yz"
import math
nlayers += 1 # add 1 for blended view
nrows = math.floor(math.sqrt(nlayers))
ncols = math.ceil(nlayers/nrows)
names = [layer["name"] for layer in layers]
layout_col = {"type": "column", "children": []}
for row in range(nrows):
layout = {"type": "row", "children": []}
for col in range(ncols):
idx = row * ncols + col
if idx >= nlayers:
continue
if idx == nlayers - 1:
layout["children"].append({"layers": names, "layout": "yz", "type": "viewer"})
else:
layout["children"].append({"layers": [names[idx]], "layout": "yz", "type": "viewer"})
layout_col["children"].append(layout)
if nrows == 1:
layout = layout_col["children"].pop()
else:
layout = layout_col
return layout
def get_ng_url(zarrmap, sub, sample):
layers = []
for stain in zarrmap.keys():
sources = zarrmap[stain]
layer = dict(
source=sources,
type="image",
shader=ng_colormap[stain.split("_")[0]],
name=f"{sub}-{sample}-{stain}-{len(sources)}",
tab='rendering',
)
layers.append(layer)
layout = to_layout(layers)
ng_url = "https://neuroglancer-demo.appspot.com/"
ng_str = json.dumps(dict(dimensions={"t":[1,"s"],
"z":[0.000002285,"m"],
"y":[0.0000032309999999999996,"m"],
"x":[0.000002285,"m"]},
displayDimensions=["z","y","x"],
crossSectionScale=50,
projectionScale=500000,
layers=layers,
showDefaultAnnotations=False,
layerListPanel={'visible': len(layers)>1},
layout=layout))
url = f"{ng_url}#!%s" % quote(ng_str)
return url
cubehelix_template = """
#uicontrol float brightness slider(min=0.0, max=100.0, default=%f)
void main() {
float x = clamp(toNormalized(getDataValue()) * brightness, 0.0, 1.0);
float angle = 2.0 * 3.1415926 * (4.0 / 3.0 + x);
float amp = x * (1.0 - x) / 2.0;
vec3 result;
float cosangle = cos(angle);
float sinangle = sin(angle);
result.r = -0.14861 * cosangle + 1.78277 * sinangle;
result.g = -0.29227 * cosangle + -0.90649 * sinangle;
result.b = 1.97294 * cosangle;
result = clamp(x + amp * result, 0.0, 1.0);
emitRGB(result);
}
"""
cubehelix2_template = """
#uicontrol float brightness slider(min=0.0, max=100.0, default=%f)
void main() {
float x = clamp(toNormalized(getDataValue()) * brightness, 0.0, 1.0);
float angle = 2.0 * 3.1415926 * (4.0 / 3.0 + x);
float amp = x * (1.0 - x) / 2.0;
vec3 result;
float cosangle = cos(angle);
float sinangle = sin(angle);
result.g = -0.14861 * cosangle + 1.78277 * sinangle;
result.r = -0.29227 * cosangle + -0.90649 * sinangle;
result.b = 1.97294 * cosangle;
result = clamp(x + amp * result, 0.0, 1.0);
emitRGB(result);
}
"""
cubehelix3_template = """
#uicontrol float brightness slider(min=0.0, max=100.0, default=%f)
void main() {
float x = clamp(toNormalized(getDataValue()) * brightness, 0.0, 1.0);
float angle = 2.0 * 3.1415926 * (4.0 / 3.0 + x);
float amp = x * (1.0 - x) / 2.0;
vec3 result;
float cosangle = cos(angle);
float sinangle = sin(angle);
result.b = -0.14861 * cosangle + 1.78277 * sinangle;
result.g = -0.29227 * cosangle + -0.90649 * sinangle;
result.r = 1.97294 * cosangle;
result = clamp(x + amp * result, 0.0, 1.0);
emitRGB(result);
}
"""
cubehelix4_template = """
#uicontrol float brightness slider(min=0.0, max=100.0, default=%f)
void main() {
float x = clamp(toNormalized(getDataValue()) * brightness, 0.0, 1.0);
float angle = 2.0 * 3.1415926 * (4.0 / 3.0 + x);
float amp = x * (1.0 - x) / 2.0;
vec3 result;
float cosangle = cos(angle);
float sinangle = sin(angle);
result.r = -0.14861 * cosangle + 1.78277 * sinangle;
result.g = -0.29227 * cosangle + -0.90649 * sinangle;
result.b = 1.97294 * cosangle;
result = clamp(x + amp * result, 0.0, 1.0);
emitRGB(result);
}
"""
cubehelix5_template = """
#uicontrol float brightness slider(min=0.0, max=100.0, default=%f)
void main() {
float x = clamp(toNormalized(getDataValue()) * brightness, 0.0, 1.0);
float angle = 2.0 * 3.1415926 * (4.0 / 3.0 + x);
float amp = x * (1.0 - x) / 2.0;
vec3 result;
float cosangle = cos(angle);
float sinangle = sin(angle);
result.g = -0.14861 * cosangle + 1.78277 * sinangle;
result.b = -0.29227 * cosangle + -0.90649 * sinangle;
result.r = 1.97294 * cosangle;
result = clamp(x + amp * result, 0.0, 1.0);
emitRGB(result);
}
"""
cubehelix6_template = """
#uicontrol float brightness slider(min=0.0, max=100.0, default=%f)
void main() {
float x = clamp(toNormalized(getDataValue()) * brightness, 0.0, 1.0);
float angle = 2.0 * 3.1415926 * (4.0 / 3.0 + x);
float amp = x * (1.0 - x) / 2.0;
vec3 result;
float cosangle = cos(angle);
float sinangle = sin(angle);
result.b = -0.14861 * cosangle + 1.78277 * sinangle;
result.r = -0.29227 * cosangle + -0.90649 * sinangle;
result.g = 1.97294 * cosangle;
result = clamp(x + amp * result, 0.0, 1.0);
emitRGB(result);
}
"""
ng_colormap = {"LEC": cubehelix_template % 50,
"YO": cubehelix2_template % 50,
"NN": cubehelix3_template % 50,
"CR": cubehelix4_template % 50,
'NPY': cubehelix5_template % 50,
'IBA1': cubehelix6_template % 50,
'SST': cubehelix4_template % 50,
'ABETA': cubehelix_template % 50,
'PTAU': cubehelix5_template % 50,
'GFAP': cubehelix3_template % 50,
'PV': cubehelix5_template % 50,
"MBP": cubehelix5_template % 50,
"SMI": cubehelix3_template % 50,
"SMI312": cubehelix3_template % 50,
"CD31": cubehelix4_template % 50,
}
def to_layout(layers):
nlayers = len(layers)
if nlayers == 1:
return "yz"
import math
nlayers += 1 # add 1 for blended view
nrows = math.floor(math.sqrt(nlayers))
ncols = math.ceil(nlayers/nrows)
names = [layer["name"] for layer in layers]
layout_col = {"type": "column", "children": []}
for row in range(nrows):
layout = {"type": "row", "children": []}
for col in range(ncols):
idx = row * ncols + col
if idx >= nlayers:
continue
if idx == nlayers - 1:
layout["children"].append({"layers": names, "layout": "yz", "type": "viewer"})
else:
layout["children"].append({"layers": [names[idx]], "layout": "yz", "type": "viewer"})
layout_col["children"].append(layout)
if nrows == 1:
layout = layout_col["children"].pop()
else:
layout = layout_col
return layout
def get_ng_url(zarrmap, sub, sample):
layers = []
for stain in zarrmap.keys():
sources = zarrmap[stain]
layer = dict(
source=sources,
type="image",
shader=ng_colormap[stain.split("_")[0]],
name=f"{sub}-{sample}-{stain}-{len(sources)}",
tab='rendering',
)
layers.append(layer)
layout = to_layout(layers)
ng_url = "https://neuroglancer-demo.appspot.com/"
ng_str = json.dumps(dict(dimensions={"t":[1,"s"],
"z":[0.000002285,"m"],
"y":[0.0000032309999999999996,"m"],
"x":[0.000002285,"m"]},
displayDimensions=["z","y","x"],
crossSectionScale=50,
projectionScale=500000,
layers=layers,
showDefaultAnnotations=False,
layerListPanel={'visible': len(layers)>1},
layout=layout))
url = f"{ng_url}#!%s" % quote(ng_str)
return url
In [21]:
Copied!
# https://sashamaps.net/docs/resources/20-colors/
colormap = ['#e6194B', '#3cb44b', '#ffe119', '#4363d8', '#f58231',
'#911eb4', '#42d4f4', '#f032e6', '#bfef45', '#fabed4',
'#469990', '#dcbeff', '#9A6324', '#fffac8', '#800000',
'#aaffc3', '#808000', '#ffd8b1', '#000075', '#a9a9a9',
]
# https://jacksonlab.agronomy.wisc.edu/2016/05/23/15-level-colorblind-friendly-palette/
colormap = ["#000000","#004949","#009292","#ff6db6","#ffb6db",
"#490092","#006ddb","#b66dff","#6db6ff","#b6dbff",
"#920000","#924900","#db6d00","#24ff24","#ffff6d"]
# https://sashamaps.net/docs/resources/20-colors/
colormap = ['#e6194B', '#3cb44b', '#ffe119', '#4363d8', '#f58231',
'#911eb4', '#42d4f4', '#f032e6', '#bfef45', '#fabed4',
'#469990', '#dcbeff', '#9A6324', '#fffac8', '#800000',
'#aaffc3', '#808000', '#ffd8b1', '#000075', '#a9a9a9',
]
# https://jacksonlab.agronomy.wisc.edu/2016/05/23/15-level-colorblind-friendly-palette/
colormap = ["#000000","#004949","#009292","#ff6db6","#ffb6db",
"#490092","#006ddb","#b66dff","#6db6ff","#b6dbff",
"#920000","#924900","#db6d00","#24ff24","#ffff6d"]
In [22]:
Copied!
def sequence_plot(data, x_range, y_range, title, x_rect, y_rect, color_rect, ncols=60, tooltips=None, callback=None,
x_axis_location="below", mf=None):
nsamples = len(x_range)
if (nsamples/ncols) < 0.8:
ncols = nsamples
if mf is None:
mf = 60/ncols
tools = "save"
if tooltips:
tools += ",hover"
if callback:
tools += ",tap"
p = figure(title=title,
x_axis_location=x_axis_location, tools=tools,
x_range=x_range, y_range=y_range,
tooltips=tooltips)
p.width = max(int(15*ncols*mf), 400)
p.height = max(int(len(y_range)*20*mf), 125)
p.grid.grid_line_color = None
p.axis.axis_line_color = None
p.axis.major_tick_line_color = None
p.axis.major_label_text_font_size = f"{int(8*mf)}px"
p.title.text_font_size = f"{int(10*mf)}px"
p.axis.major_label_standoff = 0
p.xaxis.major_label_orientation = np.pi/3
p.output_backend = "svg"
p.rect(x_rect, y_rect, 0.9, 0.9, source=data,
color=color_rect, line_color=None,
hover_line_color='red') #, hover_color='red') #colors')
if callback is not None:
taptool = p.select(type=TapTool)
taptool.callback = callback
return p
def paged_sequence_plot(data, x_range, y_range, title, x_rect, y_rect, color_rect, ncols=60, tooltips=None, callback=None,
x_axis_location="below", mf=None):
plots = []
numpages = int(np.ceil(len(x_range)/ncols))
for i in range(1, numpages + 1):
if (len(x_range) - ncols * (i-1))/ncols < 0.8:
ncols = min(len(x_range) - ncols * (i-1), ncols)
mod_xrange = x_range[ncols*(i - 1):(ncols*i)]
p = sequence_plot(data, mod_xrange, y_range,
title=f"{title} Coverage: {mod_xrange[0]} -- {mod_xrange[-1]}",
x_rect=x_rect, y_rect=y_rect,
color_rect=color_rect,
x_axis_location=x_axis_location,
ncols=ncols,
tooltips=tooltips,
callback=callback,
mf=mf)
plots.append(p)
return plots
def sequence_plot(data, x_range, y_range, title, x_rect, y_rect, color_rect, ncols=60, tooltips=None, callback=None,
x_axis_location="below", mf=None):
nsamples = len(x_range)
if (nsamples/ncols) < 0.8:
ncols = nsamples
if mf is None:
mf = 60/ncols
tools = "save"
if tooltips:
tools += ",hover"
if callback:
tools += ",tap"
p = figure(title=title,
x_axis_location=x_axis_location, tools=tools,
x_range=x_range, y_range=y_range,
tooltips=tooltips)
p.width = max(int(15*ncols*mf), 400)
p.height = max(int(len(y_range)*20*mf), 125)
p.grid.grid_line_color = None
p.axis.axis_line_color = None
p.axis.major_tick_line_color = None
p.axis.major_label_text_font_size = f"{int(8*mf)}px"
p.title.text_font_size = f"{int(10*mf)}px"
p.axis.major_label_standoff = 0
p.xaxis.major_label_orientation = np.pi/3
p.output_backend = "svg"
p.rect(x_rect, y_rect, 0.9, 0.9, source=data,
color=color_rect, line_color=None,
hover_line_color='red') #, hover_color='red') #colors')
if callback is not None:
taptool = p.select(type=TapTool)
taptool.callback = callback
return p
def paged_sequence_plot(data, x_range, y_range, title, x_rect, y_rect, color_rect, ncols=60, tooltips=None, callback=None,
x_axis_location="below", mf=None):
plots = []
numpages = int(np.ceil(len(x_range)/ncols))
for i in range(1, numpages + 1):
if (len(x_range) - ncols * (i-1))/ncols < 0.8:
ncols = min(len(x_range) - ncols * (i-1), ncols)
mod_xrange = x_range[ncols*(i - 1):(ncols*i)]
p = sequence_plot(data, mod_xrange, y_range,
title=f"{title} Coverage: {mod_xrange[0]} -- {mod_xrange[-1]}",
x_rect=x_rect, y_rect=y_rect,
color_rect=color_rect,
x_axis_location=x_axis_location,
ncols=ncols,
tooltips=tooltips,
callback=callback,
mf=mf)
plots.append(p)
return plots
Work on a dashboard for a specific dandiset¶
In [23]:
Copied!
dandiset = "000108"
#dandiset = "000026"
api = DandiAPIClient("https://api.dandiarchive.org/api")
ds = api.get_dandiset(dandiset)
df, assets = assets_to_df(ds)
df_orig = None
df.head()
dandiset = "000108"
#dandiset = "000026"
api = DandiAPIClient("https://api.dandiarchive.org/api")
ds = api.get_dandiset(dandiset)
df, assets = assets_to_df(ds)
df_orig = None
df.head()
Out[23]:
| path | extension | modified | sub | subdir | modality | ses | sample | stain | run | chunk | |
|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | dataset_description.json | json | 2022-07-13 21:40:28.737065+00:00 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
| 1 | samples.tsv | tsv | 2022-07-13 21:41:07.535853+00:00 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
| 2 | sub-MITU01/sub-MITU01_sessions.tsv | tsv | 2022-07-13 21:43:56.549551+00:00 | MITU01 | MITU01 | sessions | NaN | NaN | NaN | NaN | NaN |
| 3 | sub-MITU01/ses-20210720h20m19s32/micr/sub-MITU... | ome.zarr | 2022-11-01 18:00:14.302265+00:00 | MITU01 | MITU01 | SPIM | 20210720h20m19s32 | 127 | YO | 1 | 8 |
| 4 | sub-MITU01/ses-20210720h20m19s32/micr/sub-MITU... | ome.zarr | 2022-11-01 18:33:42.951214+00:00 | MITU01 | MITU01 | SPIM | 20210720h20m19s32 | 127 | YO | 1 | 9 |
Make dandiset specific alterations to dataframe¶
In [24]:
Copied!
df["modality"].unique()
df["modality"].unique()
Out[24]:
array([nan, 'sessions', 'SPIM', 'photo', 'corr'], dtype=object)
In [25]:
Copied!
if df_orig is None:
df_orig = df.copy()
else:
df = df_orig.copy()
if dandiset == "000108":
remap = dict(calretinin='CR', npy='NPY')
def sample_to_int(x):
if isinstance(x, str) or not np.isnan(x):
return int(x.split('R')[0])
return x
df.stain = df.stain.apply(lambda x: remap[x] if x in remap else x)
#df['sample'] = df['sample'].apply(sample_to_int).astype(pd.Int64Dtype())
if dandiset == "000026":
df = df[(df.path.str.contains("derivatives") &
((df.path.str.contains("EPIC") == False) &
(df.path.str.contains("STER") == False)))
== False]
df.head()
if df_orig is None:
df_orig = df.copy()
else:
df = df_orig.copy()
if dandiset == "000108":
remap = dict(calretinin='CR', npy='NPY')
def sample_to_int(x):
if isinstance(x, str) or not np.isnan(x):
return int(x.split('R')[0])
return x
df.stain = df.stain.apply(lambda x: remap[x] if x in remap else x)
#df['sample'] = df['sample'].apply(sample_to_int).astype(pd.Int64Dtype())
if dandiset == "000026":
df = df[(df.path.str.contains("derivatives") &
((df.path.str.contains("EPIC") == False) &
(df.path.str.contains("STER") == False)))
== False]
df.head()
Out[25]:
| path | extension | modified | sub | subdir | modality | ses | sample | stain | run | chunk | |
|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | dataset_description.json | json | 2022-07-13 21:40:28.737065+00:00 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
| 1 | samples.tsv | tsv | 2022-07-13 21:41:07.535853+00:00 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
| 2 | sub-MITU01/sub-MITU01_sessions.tsv | tsv | 2022-07-13 21:43:56.549551+00:00 | MITU01 | MITU01 | sessions | NaN | NaN | NaN | NaN | NaN |
| 3 | sub-MITU01/ses-20210720h20m19s32/micr/sub-MITU... | ome.zarr | 2022-11-01 18:00:14.302265+00:00 | MITU01 | MITU01 | SPIM | 20210720h20m19s32 | 127 | YO | 1 | 8 |
| 4 | sub-MITU01/ses-20210720h20m19s32/micr/sub-MITU... | ome.zarr | 2022-11-01 18:33:42.951214+00:00 | MITU01 | MITU01 | SPIM | 20210720h20m19s32 | 127 | YO | 1 | 9 |
Subject x modality¶
In [26]:
Copied!
df_sub = df[['sub', 'modality']].dropna().groupby(['sub', 'modality']).count().index.to_frame()
df_sub
df_sub = df[['sub', 'modality']].dropna().groupby(['sub', 'modality']).count().index.to_frame()
df_sub
Out[26]:
| sub | modality | ||
|---|---|---|---|
| sub | modality | ||
| MITU01 | SPIM | MITU01 | SPIM |
| photo | MITU01 | photo | |
| sessions | MITU01 | sessions | |
| MITU01h3 | SPIM | MITU01h3 | SPIM |
| corr | MITU01h3 | corr | |
| SChmi53 | SPIM | SChmi53 | SPIM |
| U01hm15x | SPIM | U01hm15x | SPIM |
| mEhm | SPIM | mEhm | SPIM |
In [27]:
Copied!
data_sub = {val: df_sub[val].tolist() for val in df_sub.keys()}
x_range = sorted(df_sub['sub'].unique().tolist())
y_range = sorted(df_sub.modality.unique().tolist())
data_sub["colors" ] = []
for val in data_sub["modality"]:
data_sub["colors"].append(colormap[y_range.index(val)])
data_sub = {val: df_sub[val].tolist() for val in df_sub.keys()}
x_range = sorted(df_sub['sub'].unique().tolist())
y_range = sorted(df_sub.modality.unique().tolist())
data_sub["colors" ] = []
for val in data_sub["modality"]:
data_sub["colors"].append(colormap[y_range.index(val)])
In [28]:
Copied!
p_sub = sequence_plot(data_sub, x_range, y_range,
title="Subjects and modalities",
x_rect="sub", y_rect="modality",
color_rect="colors", x_axis_location="above",
mf=1 if dandiset == "000108" else 2)
show(p_sub)
p_sub = sequence_plot(data_sub, x_range, y_range,
title="Subjects and modalities",
x_rect="sub", y_rect="modality",
color_rect="colors", x_axis_location="above",
mf=1 if dandiset == "000108" else 2)
show(p_sub)
Add error analysis to the plot¶
In [29]:
Copied!
ERRORS_PLOT = True and (dandiset == "000108") # can be changed to False
ERRORS_PLOT = True and (dandiset == "000108") # can be changed to False
In [30]:
Copied!
def format_errors(err_dict):
err_dict = eval(err_dict)
message = ""
colors = []
if err_dict["No level 6"]:
colors.append(1)
message += "No level6"
elif err_dict["Zeros"] or err_dict["Different shapes"] or err_dict["ClientError"]:
if err_dict["Zeros"]:
colors.append(2)
zeros_el = ",".join([str(el) for el in err_dict["Zeros"]])
message += f"Zeros in: {zeros_el}."
if err_dict["Different shapes"]:
colors.append(3)
if message:
message += "\n"
message += "Chunks with different shapes."
if err_dict["ClientError"]:
colors.append(4)
if message:
message += "\n"
message += "Client Error"
if colors:
colors = min(colors)
else:
message = "No errors"
colors = 99 # something big
return pd.Series([message, colors])
def format_errors(err_dict):
err_dict = eval(err_dict)
message = ""
colors = []
if err_dict["No level 6"]:
colors.append(1)
message += "No level6"
elif err_dict["Zeros"] or err_dict["Different shapes"] or err_dict["ClientError"]:
if err_dict["Zeros"]:
colors.append(2)
zeros_el = ",".join([str(el) for el in err_dict["Zeros"]])
message += f"Zeros in: {zeros_el}."
if err_dict["Different shapes"]:
colors.append(3)
if message:
message += "\n"
message += "Chunks with different shapes."
if err_dict["ClientError"]:
colors.append(4)
if message:
message += "\n"
message += "Client Error"
if colors:
colors = min(colors)
else:
message = "No errors"
colors = 99 # something big
return pd.Series([message, colors])
In [31]:
Copied!
def read_errors(sub, dandiset, prefix="dferr_sub-"):
try:
df_err = pd.read_csv(f'dferr_sub-{sub}.csv', dtype=object)
except FileNotFoundError:
return None
df_err = df_err.drop(["chunk", "path"], axis=1)
if dandiset == "000108":
remap = dict(calretinin='CR', npy='NPY')
df_err.stain = df_err.stain.apply(lambda x: remap[x] if x in remap else x)
df_err = df_err.set_index(['sample', 'stain', "ses"])
df_err[["err_mes", "err_nr"]] = df_err["errors"].apply(lambda err: format_errors(err))
df_err["err_col"]=None
colors_map = {1: "#444444", 2: "#e31a1c", 3: "#fb9a99", 4: "#fdbf6f", 99: "#33a02c"}
for gr, el in df_err.groupby(["sample", "stain"]):
df_err.loc[gr[0]].at[gr[1], "err_col"] = colors_map[el["err_nr"].min()]
return df_err
def read_errors(sub, dandiset, prefix="dferr_sub-"):
try:
df_err = pd.read_csv(f'dferr_sub-{sub}.csv', dtype=object)
except FileNotFoundError:
return None
df_err = df_err.drop(["chunk", "path"], axis=1)
if dandiset == "000108":
remap = dict(calretinin='CR', npy='NPY')
df_err.stain = df_err.stain.apply(lambda x: remap[x] if x in remap else x)
df_err = df_err.set_index(['sample', 'stain', "ses"])
df_err[["err_mes", "err_nr"]] = df_err["errors"].apply(lambda err: format_errors(err))
df_err["err_col"]=None
colors_map = {1: "#444444", 2: "#e31a1c", 3: "#fb9a99", 4: "#fdbf6f", 99: "#33a02c"}
for gr, el in df_err.groupby(["sample", "stain"]):
df_err.loc[gr[0]].at[gr[1], "err_col"] = colors_map[el["err_nr"].min()]
return df_err
In [32]:
Copied!
plots = []
for group in df.groupby("subdir"):
errors_plot = ERRORS_PLOT
print("group[0]", group[0])
mi_chunk = df[(df["sub"] == group[0]) & (df.modality == "SPIM") & df.extension.str.contains("ome")].groupby(['sample', 'stain', 'ses']).chunk.count()
if mi_chunk.shape == (0,):
continue
dfcat = pd.concat((mi_chunk.index.to_frame(), mi_chunk), axis=1)
if errors_plot:
df_err = read_errors(sub=group[0], dandiset=dandiset)
if df_err is not None:
dfcat = pd.concat((dfcat, df_err), axis=1)
else:
errors_plot = False
stains = dfcat["stain"].unique().tolist()
if dandiset == "000026":
samples = dfcat["sample"].unique().tolist()
if dandiset == "000108":
samples = sorted(dfcat["sample"].unique().tolist()) #, key=lambda x: int(x.split("R")[0]))
if group[0] == "MITU01":
samples = sorted(set([str(val) for val in range(1, 181)]).union(set(samples)),
key=lambda x: int(x.split("R")[0]))
photos = {}
for val in df[(df["sub"] == group[0]) & (df.modality == "photo")]["sample"].to_frame().iterrows():
url = assets[val[0]].get_content_url(regex='s3')
photos[val[1]["sample"]] = url
if dandiset == "000108":
zarrmap = {}
for grp in df[(df["sub"] == group[0]) &
(df.modality == "SPIM") &
df.extension.str.contains("ome")].groupby(['sample', 'stain',]):
sample, stain = grp[0]
if sample not in zarrmap:
zarrmap[sample] = {}
chunks = grp[1]["chunk"].values
if len(chunks) != len(np.unique(chunks)):
print(f"multiple chunks detected for sample: {sample} stain: {stain}")
for session in grp[1].groupby("ses"):
zarrmap[sample][f"{stain}_{session[0]}"] = [f"zarr://{val.get_content_url(regex='s3')}"
for val in sorted([assets[idx] for idx in session[1].index],
key=lambda x: int(x.path.split("_chunk-")[1].split("_")[0]))]
else:
zarrmap[sample][stain] = [f"zarr://{val.get_content_url(regex='s3')}"
for val in sorted([assets[idx] for idx in grp[1].index],
key=lambda x: int(x.path.split("_chunk-")[1].split("_")[0]))]
ngurls = {}
for key in zarrmap:
ngurls[key] = unquote(get_ng_url(zarrmap[key], group[0], key))
data=dict(
samples=dfcat['sample'].tolist(),
stains=dfcat['stain'].tolist(),
colors=[colormap[stains.index(stain)] for stain in dfcat['stain'].tolist()],
sessions=dfcat['ses'].tolist(),
chunks=dfcat['chunk'].tolist(),
photo=[f'<img src="{photos.get(val[1]["sample"])}" width="100px"/>' for val in dfcat.iterrows()]
)
if errors_plot:
data["err_mes"] = dfcat["err_mes"].tolist()
data["err_col"] = dfcat["err_col"].tolist()
if dandiset == "000108":
data["url"] = [f'{ngurls.get(val[1]["sample"])}' for val in dfcat.iterrows()]
p = paged_sequence_plot(data, samples, stains,
title=f"Sub {group[0]} samples",
x_rect="samples", y_rect="stains",
color_rect="colors", x_axis_location="below",
tooltips = [('sample', '@samples'),
('chunks', '@chunks'),
('sessions', '@sessions'),
],
callback=OpenURL(url="@url") if dandiset == "000108" else None,
mf=1, ncols=62)
plots.append(column(p))
if errors_plot:
p_err = paged_sequence_plot(data, samples, stains,
title=f"Errors: {group[0]}",
x_rect="samples", y_rect="stains",
color_rect="err_col", x_axis_location="below",
tooltips = [('sessions', '@sessions'), ('errors', '@err_mes')],
mf=1, ncols=62)
plots.append(column(p_err))
plots = []
for group in df.groupby("subdir"):
errors_plot = ERRORS_PLOT
print("group[0]", group[0])
mi_chunk = df[(df["sub"] == group[0]) & (df.modality == "SPIM") & df.extension.str.contains("ome")].groupby(['sample', 'stain', 'ses']).chunk.count()
if mi_chunk.shape == (0,):
continue
dfcat = pd.concat((mi_chunk.index.to_frame(), mi_chunk), axis=1)
if errors_plot:
df_err = read_errors(sub=group[0], dandiset=dandiset)
if df_err is not None:
dfcat = pd.concat((dfcat, df_err), axis=1)
else:
errors_plot = False
stains = dfcat["stain"].unique().tolist()
if dandiset == "000026":
samples = dfcat["sample"].unique().tolist()
if dandiset == "000108":
samples = sorted(dfcat["sample"].unique().tolist()) #, key=lambda x: int(x.split("R")[0]))
if group[0] == "MITU01":
samples = sorted(set([str(val) for val in range(1, 181)]).union(set(samples)),
key=lambda x: int(x.split("R")[0]))
photos = {}
for val in df[(df["sub"] == group[0]) & (df.modality == "photo")]["sample"].to_frame().iterrows():
url = assets[val[0]].get_content_url(regex='s3')
photos[val[1]["sample"]] = url
if dandiset == "000108":
zarrmap = {}
for grp in df[(df["sub"] == group[0]) &
(df.modality == "SPIM") &
df.extension.str.contains("ome")].groupby(['sample', 'stain',]):
sample, stain = grp[0]
if sample not in zarrmap:
zarrmap[sample] = {}
chunks = grp[1]["chunk"].values
if len(chunks) != len(np.unique(chunks)):
print(f"multiple chunks detected for sample: {sample} stain: {stain}")
for session in grp[1].groupby("ses"):
zarrmap[sample][f"{stain}_{session[0]}"] = [f"zarr://{val.get_content_url(regex='s3')}"
for val in sorted([assets[idx] for idx in session[1].index],
key=lambda x: int(x.path.split("_chunk-")[1].split("_")[0]))]
else:
zarrmap[sample][stain] = [f"zarr://{val.get_content_url(regex='s3')}"
for val in sorted([assets[idx] for idx in grp[1].index],
key=lambda x: int(x.path.split("_chunk-")[1].split("_")[0]))]
ngurls = {}
for key in zarrmap:
ngurls[key] = unquote(get_ng_url(zarrmap[key], group[0], key))
data=dict(
samples=dfcat['sample'].tolist(),
stains=dfcat['stain'].tolist(),
colors=[colormap[stains.index(stain)] for stain in dfcat['stain'].tolist()],
sessions=dfcat['ses'].tolist(),
chunks=dfcat['chunk'].tolist(),
photo=[f'
' for val in dfcat.iterrows()]
)
if errors_plot:
data["err_mes"] = dfcat["err_mes"].tolist()
data["err_col"] = dfcat["err_col"].tolist()
if dandiset == "000108":
data["url"] = [f'{ngurls.get(val[1]["sample"])}' for val in dfcat.iterrows()]
p = paged_sequence_plot(data, samples, stains,
title=f"Sub {group[0]} samples",
x_rect="samples", y_rect="stains",
color_rect="colors", x_axis_location="below",
tooltips = [('sample', '@samples'),
('chunks', '@chunks'),
('sessions', '@sessions'),
],
callback=OpenURL(url="@url") if dandiset == "000108" else None,
mf=1, ncols=62)
plots.append(column(p))
if errors_plot:
p_err = paged_sequence_plot(data, samples, stains,
title=f"Errors: {group[0]}",
x_rect="samples", y_rect="stains",
color_rect="err_col", x_axis_location="below",
tooltips = [('sessions', '@sessions'), ('errors', '@err_mes')],
mf=1, ncols=62)
plots.append(column(p_err))
group[0] MITU01 multiple chunks detected for sample: 10 stain: LEC multiple chunks detected for sample: 10 stain: NN multiple chunks detected for sample: 10 stain: YO multiple chunks detected for sample: 73 stain: LEC multiple chunks detected for sample: 73 stain: NN multiple chunks detected for sample: 73 stain: YO multiple chunks detected for sample: 97 stain: LEC multiple chunks detected for sample: 97 stain: NN multiple chunks detected for sample: 97 stain: YO group[0] MITU01h3 multiple chunks detected for sample: 15 stain: LEC multiple chunks detected for sample: 15 stain: NN multiple chunks detected for sample: 15 stain: YO multiple chunks detected for sample: 16 stain: YO multiple chunks detected for sample: 20 stain: LEC multiple chunks detected for sample: 20 stain: NN multiple chunks detected for sample: 20 stain: YO multiple chunks detected for sample: 4 stain: YO group[0] SChmi53 multiple chunks detected for sample: 28 stain: LEC multiple chunks detected for sample: 28 stain: NN multiple chunks detected for sample: 28 stain: YO multiple chunks detected for sample: 8 stain: LEC multiple chunks detected for sample: 8 stain: NN multiple chunks detected for sample: 8 stain: YO group[0] U01hm15x group[0] mEhm
In [33]:
Copied!
output_file(f"{dandiset}-dashboard.html")
show(column([p_sub] + plots))
output_file(f"{dandiset}-dashboard.html")
show(column([p_sub] + plots))